---
title: "Old Patronage during the New Deal"
subtitle: "Replication File: Data Assembly (1/2)"
output: html_document
author: "Stephanie Ternullo, Simon Y. Shachter"
date: "2024-01-04"
---

```{r}

library( haven )
library( tidyverse )
library( readxl )

# Function interpolate
# x = earlier year
# y = later year
# z = number of years between earlier year and point of interpolation
# b = number of years in between earlier year (x) and later year (y)
interpolate <- function( x, y, z, b ) {
  ( y * ( ( 365.2 * z ) / ( 365.2 * b ) ) ) +
    ( x * ( ( ( 365.2 * b ) - ( 365.2 * z ) ) / ( 365.2 * b ) ) )
}

```

<i>This replication file is the first of two replication files. It imports, cleans, and merges data that is then displayed in tables and figures in the accompanying replication file.</i>

# State-Level Data

## Machines & Cities data

```{r}

# ICPSR crosswalk to merge location data
icpsr_merge <- read_csv( "data_to_import/icpsr_merge.csv" ) %>%
  filter( name != "INDY SUBS" )

# Author-made data of machine cities and their years
machine_cities <- read_excel( "data_to_import/machine_cities.xlsx" ) %>%
  mutate( fips = as.numeric( fips ) )

# All cities with population > 30,000 in 1931
# (from Census "Statistics of Cities Having a Population over 30,000 1931")
# modified by authors to capture machine status of the cities.
all_cities_1931 <- read_excel( "data_to_import/1931_finstat_cities_30k.xlsx" ) %>%
  filter( !is.na( city ) ) %>%
  mutate_each( funs( toupper ), city ) %>%
  mutate( city_1931 = 1 ) %>%
  # Note: Some counties have lots of small cities (ex. Middlesex has the Boston suburbs)
  distinct( fips, .keep_all = TRUE )

```


## State Spending Data

```{r}

# State population data from 1920-1940, interpolating year-by-year
state_pop1920 <- read_csv( "data_to_import/state_pop1920.csv" ) %>%
  dplyr::select( state_pop1920 = SE_T001_001, Geo_name ) 

state_pop1930 <- read_csv( "data_to_import/state_pop1930.csv" ) %>%
  dplyr::select( stpc = Geo_STUSAB, state_pop1930 = SE_T001_001, Geo_name)  

state_pop20_40 <- read_csv( "data_to_import/state_pop1940.csv" ) %>%
  dplyr::select( Geo_name = Geo_Name, state_pop1940 = SE_T001_001 ) %>%
  left_join( state_pop1930, by = "Geo_name" ) %>%
  left_join( state_pop1920, by = "Geo_name" ) %>%
  mutate( state_pop1921 = interpolate(state_pop1920, state_pop1930, 1, 10),
          state_pop1922 = interpolate(state_pop1920, state_pop1930, 2, 10),
          state_pop1923 = interpolate(state_pop1920, state_pop1930, 3, 10),
          state_pop1924 = interpolate(state_pop1920, state_pop1930, 4, 10),
          state_pop1925 = interpolate(state_pop1920, state_pop1930, 5, 10),
          state_pop1926 = interpolate(state_pop1920, state_pop1930, 6, 10),
          state_pop1927 = interpolate(state_pop1920, state_pop1930, 7, 10),
          state_pop1928 = interpolate(state_pop1920, state_pop1930, 8, 10),
          state_pop1929 = interpolate(state_pop1920, state_pop1930, 9, 10),
          state_pop1931 = interpolate(state_pop1930, state_pop1940, 1, 10),
          state_pop1932 = interpolate(state_pop1930, state_pop1940, 2, 10),
          state_pop1933 = interpolate(state_pop1930, state_pop1940, 3, 10),
          state_pop1934 = interpolate(state_pop1930, state_pop1940, 4, 10),
          state_pop1935 = interpolate(state_pop1930, state_pop1940, 5, 10),
          state_pop1936 = interpolate(state_pop1930, state_pop1940, 6, 10),
          state_pop1937 = interpolate(state_pop1930, state_pop1940, 7, 10),
          state_pop1938 = interpolate(state_pop1930, state_pop1940, 8, 10),
          state_pop1939 = interpolate(state_pop1930, state_pop1940, 9, 10)) %>%
  gather( "variable", "state_pop", -Geo_name, -stpc ) %>%
  separate( variable, into = c( "var", "year" ), -4, convert = TRUE )

# State-by-year spending data from Fishback and Kantor
# Creating per capita numbers
# See Fishback and Kantor for abbreviations.
state_spending_24_40 <- read_dta( "data_to_import/states_30_40.dta" ) %>%
  filter( year >= 1924 & year <= 1940 ) %>%
  mutate( tot_grants = sgva + sgff + sgpra + sgwpa + sgspecf + sgstmar + sgfwp + sgaaa +
            sgfsa + sgscs + sglut + sgaes + sgaew + sgcam + sgfs + sgfs + sgccc +
            sgburec + sgpba + sgpwanf2 + sgpwaf + sgusha + sgfera + sgcwa + sgssa +
            sgrh + sgminl + sgvoced + sgoed + sgusemp + sgsold + sgfscc + sgng + sgbra +
            sgftasm + sgwilres + sgaznmsc + sghlthhy + sgothed + sgphs + sgpband +
            sgfwaoff + sgoeddw,
          # Work and direct relief spending
          tot_spending = sgpwaf + sgpra + sgpba + sgwpa + sgssa + sgfera + sgcwa + sgusha +
            sgccc + sginds + sgburec + sgrh + sgwilres + sgpband + tvaspend + sgff + sgfwp +
            sgfs,
          works_grants = sgfera + sgwpa + sgcwa,
          cwa_fera = sgfera + sgcwa,
          # Programs in city measures
          tot_spending_city = sgwpa + sgfera + sgcwa + sgssa ) %>%
  dplyr::select( year, statenm, sticpsr = statecode, tot_grants, tot_spending,
                 tot_spending_city, works_grants, sgwpa, sgccc, sgfera, sgcwa, sgssa,
                 sgpra, sgpba, sgpwaf, sgusha, sgusemp, cwa_fera ) %>%
  left_join( dplyr::select( icpsr_merge, sticpsr, stpc ), by = "sticpsr" ) %>%
  left_join( state_pop20_40, by = c( "stpc", "year" ) ) %>%
  # Per capita measures of new deal spending
  mutate( spend_pc = tot_spending/state_pop,
          grants_pc = tot_grants/state_pop,
          works_grants_pc = works_grants/state_pop,
          cwa_fera_pc = cwa_fera/state_pop,
          spend_pc_city = tot_spending_city/state_pop,
          wpa_pc = sgwpa/state_pop,
          ccc_pc = sgccc/state_pop,
          fera_pc = sgfera/state_pop,
          cwa_pc = sgcwa/state_pop,
          ssa_pc = sgssa/state_pop,
          pra_pc = sgpra/state_pop,
          pba_pc = sgpba/state_pop,
          waf_pc = sgpwaf/state_pop,
          usha_pc = sgusha/state_pop,
          usemp_pc = sgusemp/state_pop ) %>%
  distinct( stpc, year, .keep_all = TRUE )

```


## Demographic Data

```{r}

# Interpolating demography data to be year-by-year for 1932-1940
demog1940 <- read_csv( "data_to_import/soc_exp1940.csv" ) %>%
  dplyr::select( fips = Geo_FIPS,
                 tot_pop1940 = SE_T001_001,
                 density1940 = SE_T003_001,
                 black_pct1940 = PCT_SE_T009_003,
                 femal_pct1940 = PCT_SE_T004_003,
                 forgn_white_pop1940 = SE_T064_001,
                 pop_651940 = PCT_SE_T011B014,
                 white_pct1940 = PCT_SE_T009_002,
                 unemp_rate1940 = PCT_SE_T031_003 ) %>%
  mutate( forgn_white_pct1940 = ( forgn_white_pop1940 / tot_pop1940 ) * 100 ) %>%
  mutate( fips = as.numeric( fips ) )

merge_file <- read_csv( "data_to_import/soc_exp1940.csv" ) %>%
  dplyr::select( Geo_QName, fips = Geo_FIPS )

demog1930 <- read_csv( "data_to_import/soc_exp1930.csv" ) %>%
  left_join( merge_file, by = "Geo_QName" ) %>%
  dplyr::select( fips, name = Geo_QName, tot_pop1930 = SE_T001_001,
                 density1930 = SE_A001_001,
                 black_pct1930 = PCT_SE_T028_003,  
                 femal_pct1930 = PCT_SE_T005_003,
                 pop_651930 = PCT_SE_T015B012,
                 white_pct1930 = PCT_SE_T028_002,
                 unemp_rate1930 = PCT_SE_T201_002,
                 forgn_white_pop1930 = SE_T008_001 ) %>%
  mutate( forgn_white_pct1930 = ( forgn_white_pop1930 / tot_pop1930 ) * 100 ) %>%
  mutate( fips = as.numeric( fips ) )

demog_1932_1940 <-  left_join( demog1930, demog1940, by = "fips" ) %>% 
  mutate( "unemp_rate1932" = interpolate( unemp_rate1930, unemp_rate1940, 2, 10 ),
          "tot_pop1932" = interpolate( tot_pop1930, tot_pop1940, 2, 10 ),
          "pop_651932" = interpolate( pop_651930, pop_651940, 2, 10 ),
          "white_pct1932" = interpolate( white_pct1930, white_pct1940, 2, 10 ),
          "black_pct1932" = interpolate( black_pct1930, black_pct1940, 2, 10 ),
          "femal_pct1932" = interpolate( femal_pct1930, femal_pct1940, 2, 10 ),
          "forgn_white_pct1932" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               2, 10 ),
          "density1932" = interpolate( density1930, density1940, 2, 10 ) ) %>%
  mutate( "unemp_rate1933" = interpolate( unemp_rate1930, unemp_rate1940, 3, 10 ),
          "tot_pop1933" = interpolate( tot_pop1930, tot_pop1940, 3, 10 ),
          "pop_651933" = interpolate( pop_651930, pop_651940, 3, 10 ),
          "white_pct1933" = interpolate( white_pct1930, white_pct1940, 3, 10 ),
          "black_pct1933" = interpolate( black_pct1930, black_pct1940, 3, 10 ),
          "femal_pct1933" = interpolate( femal_pct1930, femal_pct1940, 3, 10 ),
          "forgn_white_pct1933" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               3, 10 ),
         "density1933" = interpolate(density1930, density1940, 3, 10 ) ) %>%
  mutate( "unemp_rate1934" = interpolate( unemp_rate1930, unemp_rate1940, 4, 10 ),
          "tot_pop1934" = interpolate( tot_pop1930, tot_pop1940, 4, 10 ),
          "pop_651934" = interpolate( pop_651930, pop_651940, 4, 10 ),
          "white_pct1934" = interpolate( white_pct1930, white_pct1940, 4, 10 ),
          "black_pct1934" = interpolate( black_pct1930, black_pct1940, 4, 10 ),
          "femal_pct1934" = interpolate( femal_pct1930, femal_pct1940, 4, 10 ),
          "forgn_white_pct1934" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               4, 10 ),
          "density1934" = interpolate( density1930, density1940, 4, 10 ) ) %>%
  mutate( "unemp_rate1935" = interpolate( unemp_rate1930, unemp_rate1940, 5, 10 ),
          "tot_pop1935" = interpolate( tot_pop1930, tot_pop1940, 5, 10 ),
          "pop_651935" = interpolate( pop_651930, pop_651940, 5, 10 ),
          "white_pct1935" = interpolate( white_pct1930, white_pct1940, 5, 10 ),
          "black_pct1935" = interpolate( black_pct1930, black_pct1940, 5, 10 ),
          "femal_pct1935" = interpolate( femal_pct1930, femal_pct1940, 5, 10 ),
          "forgn_white_pct1935" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               5, 10 ),
          "density1935" = interpolate( density1930, density1940, 5, 10 ) ) %>%
  mutate( "unemp_rate1936" = interpolate( unemp_rate1930, unemp_rate1940, 6, 10 ),
          "tot_pop1936" = interpolate( tot_pop1930, tot_pop1940, 6, 10 ),
          "pop_651936" = interpolate( pop_651930, pop_651940, 6, 10 ),
          "white_pct1936" = interpolate( white_pct1930, white_pct1940, 6, 10 ),
          "black_pct1936" = interpolate( black_pct1930, black_pct1940, 6, 10 ),
          "femal_pct1936" = interpolate( femal_pct1930, femal_pct1940, 6, 10 ),
          "forgn_white_pct1936" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               6, 10 ),
          "density1936" = interpolate( density1930, density1940, 6, 10)) %>%
  mutate( "unemp_rate1937" = interpolate( unemp_rate1930, unemp_rate1940, 7, 10 ),
          "tot_pop1937" = interpolate( tot_pop1930, tot_pop1940, 7, 10 ),
          "pop_651937" = interpolate( pop_651930, pop_651940, 7, 10 ),
          "white_pct1937" = interpolate( white_pct1930, white_pct1940, 7, 10 ),
          "black_pct1937" = interpolate( black_pct1930, black_pct1940, 7, 10 ),
          "femal_pct1937" = interpolate( femal_pct1930, femal_pct1940, 7, 10 ),
          "forgn_white_pct1937" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               7, 10 ),
          "density1937" = interpolate( density1930, density1940, 7, 10)) %>%
  mutate( "unemp_rate1938" = interpolate( unemp_rate1930, unemp_rate1940, 8, 10 ),
          "tot_pop1938" = interpolate( tot_pop1930, tot_pop1940, 8, 10 ),
          "pop_651938" = interpolate( pop_651930, pop_651940, 8, 10 ),
          "white_pct1938" = interpolate( white_pct1930, white_pct1940, 8, 10 ),
          "black_pct1938" = interpolate( black_pct1930, black_pct1940, 8, 10 ),
          "femal_pct1938" = interpolate( femal_pct1930, femal_pct1940, 8, 10 ),
          "forgn_white_pct1938" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               8, 10 ),
          "density1938" = interpolate( density1930, density1940, 8, 10 ) ) %>%
  mutate( "unemp_rate1939" = interpolate( unemp_rate1930, unemp_rate1940, 9, 10 ),
          "tot_pop1939" = interpolate( tot_pop1930, tot_pop1940, 9, 10 ),
          "pop_651939" = interpolate( pop_651930, pop_651940, 9, 10 ),
          "white_pct1939" = interpolate( white_pct1930, white_pct1940, 9, 10 ),
          "black_pct1939" = interpolate( black_pct1930, black_pct1940, 9, 10 ),
          "femal_pct1939" = interpolate( femal_pct1930, femal_pct1940, 9, 10 ),
          "forgn_white_pct1939" = interpolate( forgn_white_pct1930, forgn_white_pct1940,
                                               9, 10 ),
          "density1939" = interpolate( density1930, density1940, 9, 10 ) ) %>%
  distinct( fips, .keep_all = TRUE ) %>% # Removes Milton County, GA which dissolved in 1931, lacks fips
  gather( key = "var_yr", value = "value", -fips, -name ) %>%
  separate( var_yr, into = c( "variable", "year" ), sep = -4, convert = TRUE ) %>%
  spread( variable, value ) %>%
  mutate( fips = as.numeric( fips ) ) %>%
  filter( year >= 1932 ) # Removes 1930 observations

```

## Presidential Voting Data

```{r}

# ICPSR voter file - obtaining turnout data for presidential elections, 1924-1940
pres_vote24_40 <- read_dta( "data_to_import/icpsr.dta" ) %>%
  dplyr::select( sticpsr = V1, name = V2, ctyicpsr = V3,
                 dem_pctp1924 = V452, rep_pctp1924 = V453, tot_votes1924 = V456,
                 turnout1924 = V457,
                 dem_pctp1928 = V477, rep_pctp1928 = V478, tot_votes1928 = V481,
                 turnout1928 = V482,
                 dem_pctp1932 = V498, rep_pctp1932 = V499, tot_votes1932 = V502,
                 turnout1932 = V503,
                 dem_pctp1936 = V521, rep_pctp1936 = V522, tot_votes1936 = V525,
                 turnout1936 = V526,
                 dem_pctp1940 = V546, rep_pctp1940 = V547, tot_votes1940 = V550,
                 turnout1940 = V551 ) %>%
  mutate( sticpsr = as.numeric( sticpsr ) ) %>%
  left_join( icpsr_merge, by = c( "sticpsr", "ctyicpsr" ) ) %>%
  gather( "variable", "value", -fips, -stpc, -sticpsr, -name.x, -name.y, -ctyicpsr ) %>%
  separate( variable, c( "variable", "year" ), -4, convert = TRUE ) %>%
  spread( variable, value ) %>% 
  filter( dem_pctp < 999.9 ) %>%
  mutate( year = as.numeric( year ) ) %>%
  select( fips, stpc, year, dem_pctp, rep_pctp, tot_votes, turnout ) 

pres_vote32_40 <- filter( pres_vote24_40, year >= 1932 )


```

## ICPSR Voting Data 1900-1990

```{r}
# County id crosswalk
full_merge <- read_csv( "data_to_import/full_merge.csv" )

# Crosswalk from ICPSR to FIPS for states
state_ids <- read_dta( "data_to_import/id_icpsr_fips.dta" ) %>%
  select( ICPSRSTATE, FIPSTATE ) %>%
  distinct()

# ICPSR voting data file
icpsr_data00_90 <- read_dta( "data_to_import/1900-1990_county_elections_long.dta" ) %>%
  rename( name = county_name, ICPSRSTATE = icpr_state_code ) %>%
  left_join( state_ids, by = "ICPSRSTATE" ) %>%
  rename( statefip = FIPSTATE ) %>%
  mutate( statefip = as.numeric( statefip ) ) %>%
  left_join( full_merge, by = c( "statefip", "name" ) )

# Senate voting turnout data, 1924-1940
state_data_sen20_40 <- icpsr_data00_90 %>%
  mutate( no_sen_year = ifelse( is.na( sen_tot_ ) & is.na( sen_dem_t ) &
                                  is.na( sen_rep_t ), 1, 0 ) ) %>%
  filter( no_sen_year != 1, sen_tot_ != 9999999, sen_dem_t != 9999999,
          year %in% c( 1924, 1926, 1928, 1930, 1932, 1934, 1936, 1938, 1940 ) ) %>%
  select( fips, year, stpc, dem_votess = sen_dem_t, dem_pcts = sen_dem_p,
          rep_votess = sen_rep_t, rep_pcts = sen_rep_p, tot_votess = sen_tot_ ) %>%
  mutate( dem_pcts = dem_pcts * 100, rep_pcts = rep_pcts * 100 )
  # Note: Senate data is missing Dade County (Miami), FL

```

## Representation Data

### Party Representatives - Governor and Congress

```{r}

# Congressional party affiliation
cong_rep <- read_tsv( "data_to_import/icpsr_reps/icpsr_reps.tsv",
                      col_select = c( "STALPHA", "STATE", "MCNAME", "CONGRESS",
                                      "DIST", "PARTY" ) ) %>%
  filter( CONGRESS %in% c( 72, 73, 74, 75, 76 ) ) %>%
  # Party - 1 independent, 35 reps from progressive and farmer-labor
  mutate( PARTY = ifelse( PARTY == 100, "DEM",
                          ifelse( PARTY == 200, "REP",
                                  ifelse( PARTY == 328, "IND", "LEFT" ) ) ) )

# ICPSR state to FIPS state
sticpsr_stfips <- read_dta( "data_to_import/id_icpsr_fips.dta",
                           col_select = c( "FIPSTATE", "ICPSRSTATE" ) ) %>%
  distinct()

# Senatorial party affiliation
sen_rep <- cong_rep %>%
  filter( DIST == 0 ) %>%
  group_by( STATE, CONGRESS ) %>%
  summarize( NUM_DEM = ifelse( sum( PARTY == "DEM" ) > 2, 2, sum( PARTY == "DEM" ) ),
             NUM_LEFT = ifelse( sum( PARTY == "LEFT" ) > 2, 2,
                                sum( PARTY == "LEFT" ) ) ) %>%
  left_join( sticpsr_stfips, by = c( "STATE" = "ICPSRSTATE" ) ) %>%
  ungroup()

# Case of house of representatives elected in statewide elections
statewide_rep <- cong_rep %>%
  filter( DIST %in% c( 95, 96, 97, 98 ) ) %>%
  group_by( STATE, CONGRESS ) %>%
  summarize( NUM_DEM = sum( PARTY == "DEM" ),
             NUM_LEFT = sum( PARTY == "LEFT" ) ) %>%
  left_join( sticpsr_stfips, by = c( "STATE" = "ICPSRSTATE" ) ) %>%
  ungroup()

# House of representatives elected by w/in state district
housedist_rep <- cong_rep %>%
  filter( !( DIST %in% c( 0, 95, 96, 97, 98 ) ) ) %>%
  group_by( STATE, DIST, CONGRESS ) %>%
  summarize( NUM_DEM = sum( PARTY == "DEM" ),
             NUM_LEFT = sum( PARTY == "LEFT" ) ) %>%
  left_join( sticpsr_stfips, by = c( "STATE" = "ICPSRSTATE" ) ) %>%
  ungroup()

# Governor party affiliation
gov_rep <- read_csv( "./data_to_import/icpsr_reps/governors_1775_2020.csv" ) %>%
  filter( year >= 1932, year <= 1940 ) %>%
  # Some independents, and governors from progressive parties
  mutate( PARTY = ifelse( party == "Democrat", "DEM",
                    ifelse( party == "Republican", "REP",
                      ifelse( party == "Independent", "IND", "LEFT" ) ) ) ) %>%
  left_join( sticpsr_stfips, by = c( "sticpsr" = "ICPSRSTATE" ) ) %>%
  ungroup()

```

### Political Data by Congressional District

```{r}

# Data computed by each Congress. Calculate the number of party-affiliated 
# representatives and malapportionment of the county district, 72-76th Congresses
# See Berry et al. and Ansolabehere et al.
# This portion of code reads and organizes the data by each Congress.

# 72nd Congress
state_reps72 <- read_csv("data_to_import/county_to_cd/Crosswalk_1920_72.csv" ) %>%
  distinct( cd_statefip, id ) %>%
  group_by( cd_statefip ) %>%
  summarize( state_reps = n() ) %>%
  rename( state_fips = cd_statefip ) %>%
  mutate( tot_natl_reps = sum( state_reps, na.rm = TRUE ) ) %>%
  left_join( select( filter( statewide_rep, CONGRESS == 72 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE" ) )
  
county_rep72 <- read_csv("data_to_import/county_to_cd/Crosswalk_1920_72.csv") %>%
  dplyr::select( gisjoin_1920, state_fips = cd_statefip, id, cd_area, cnty_area,
                 m1_weight, district ) %>%
  left_join( select( filter( housedist_rep, CONGRESS == 72 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE", "district" = "DIST" ) ) %>%
  mutate( NUM_DEM = replace_na( NUM_DEM, 0 ),
          NUM_LEFT = replace_na( NUM_LEFT, 0 ),
          cnty_area_in_cd = m1_weight * cnty_area,
          cnty_area_prop_cd = cnty_area_in_cd / cd_area,
          NUM_DEM = NUM_DEM * cnty_area_prop_cd,
          NUM_LEFT = NUM_LEFT * cnty_area_prop_cd ) %>%
  group_by( state_fips, gisjoin_1920 ) %>%
  summarize( tot_rep_cnty = sum( cnty_area_prop_cd, na.rm = TRUE ),
             prop_demrep_cnty = sum( NUM_DEM, na.rm = T ),
             prop_leftrep_cnty = sum( NUM_LEFT, na.rm = T ) ) %>%
  separate( gisjoin_1920, c( "other", "fips" ), -4 )%>%
  separate( fips, c( "fips", "other" ), -1 ) %>%
  mutate( fips = as.numeric( paste0( state_fips, fips ) ) ) %>%
  filter( other == 0 ) %>%
  dplyr::select( -other ) %>%
  left_join( dplyr::select( demog1930, fips, tot_pop1930 ), by = "fips" ) %>%
  left_join( state_reps72, by = "state_fips" ) %>%
  ungroup() %>%
  mutate( avg_county_rep = tot_rep_cnty / tot_pop1930 ) %>%
  group_by( state_fips ) %>%
  mutate( state_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_rep_state = state_reps / state_pop ) %>%
  ungroup() %>%
  mutate( tot_country_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_natl_rep = tot_natl_reps / tot_country_pop,
          rep_index_natl72 = avg_county_rep / avg_natl_rep,
          rep_index_state72 = avg_county_rep / avg_rep_state,
          # Adjusting for statewide house offices
          NUM_DEM_HOUSE = replace_na( NUM_DEM, 0 ),
          NUM_LEFT_HOUSE = replace_na( NUM_LEFT, 0 ),
          prop_demrep_cnty72 = prop_demrep_cnty +
            NUM_DEM_HOUSE * ( tot_pop1930 / state_pop ),
          prop_leftrep_cnty72 = prop_leftrep_cnty +
            NUM_LEFT_HOUSE * ( tot_pop1930 / state_pop )) %>%
  dplyr::select( state_fips, fips, avg_county_rep72 = avg_county_rep, rep_index_natl72,
                 rep_index_state72, prop_demrep_cnty72, prop_leftrep_cnty72 )

# 73rd Congress
state_reps73 <- read_csv( "data_to_import/county_to_cd/Crosswalk_1930_73.csv" ) %>%
  distinct( cd_statefip, id ) %>%
  group_by( cd_statefip ) %>%
  summarize( state_reps = n() ) %>%
  rename( state_fips = cd_statefip ) %>%
  mutate( tot_natl_reps = sum( state_reps, na.rm = TRUE ) ) %>%
  left_join( select( filter( statewide_rep, CONGRESS == 73 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE" ) )

county_rep73 <- read_csv( "data_to_import/county_to_cd/Crosswalk_1930_73.csv" ) %>%
  dplyr::select( gisjoin_1930, state_fips = cd_statefip, id, cd_area, cnty_area,
                 m1_weight, district ) %>%
  left_join( select( filter( housedist_rep, CONGRESS == 73 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE", "district" = "DIST" ) ) %>%
  mutate( NUM_DEM = replace_na( NUM_DEM, 0 ),
          NUM_LEFT = replace_na( NUM_LEFT, 0 ),
          cnty_area_in_cd = m1_weight * cnty_area,
          cnty_area_prop_cd = cnty_area_in_cd / cd_area,
          NUM_DEM = NUM_DEM * cnty_area_prop_cd,
          NUM_LEFT = NUM_LEFT * cnty_area_prop_cd ) %>%
  group_by( state_fips, gisjoin_1930 ) %>%
  summarize( tot_rep_cnty = sum( cnty_area_prop_cd, na.rm = TRUE ),
             prop_demrep_cnty = sum( NUM_DEM, na.rm = T ),
             prop_leftrep_cnty = sum( NUM_LEFT, na.rm = T ) ) %>%
  separate( gisjoin_1930, c( "other", "fips" ), -4 )%>%
  separate( fips, c( "fips", "other" ), -1 ) %>%
  mutate( fips = as.numeric( paste0( state_fips, fips ) ) ) %>%
  filter( other == 0 ) %>%
  dplyr::select( -other ) %>%
  left_join( dplyr::select( demog1930, fips, tot_pop1930 ), by = "fips" ) %>%
  left_join( state_reps73, by = "state_fips" ) %>%
  ungroup() %>%
  mutate( avg_county_rep = tot_rep_cnty / tot_pop1930 ) %>%
  group_by( state_fips ) %>%
  mutate( state_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_rep_state = state_reps / state_pop ) %>%
  ungroup() %>%
  mutate( tot_country_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_natl_rep = tot_natl_reps / tot_country_pop,
          rep_index_natl73 = avg_county_rep / avg_natl_rep,
          rep_index_state73 = avg_county_rep / avg_rep_state,
          NUM_DEM_HOUSE = replace_na( NUM_DEM, 0 ),
          NUM_LEFT_HOUSE = replace_na( NUM_LEFT, 0 ),
          prop_demrep_cnty73 = prop_demrep_cnty +
            NUM_DEM_HOUSE * ( tot_pop1930 / state_pop ),
          prop_leftrep_cnty73 = prop_leftrep_cnty +
            NUM_LEFT_HOUSE * ( tot_pop1930 / state_pop )) %>%
  dplyr::select( state_fips, fips, avg_county_rep73 = avg_county_rep, rep_index_natl73,
                 rep_index_state73, prop_demrep_cnty73, prop_leftrep_cnty73 )

# 74th Congress
state_reps74 <- read_csv("data_to_import/county_to_cd/Crosswalk_1930_74.csv") %>%
  distinct( cd_statefip, id ) %>%
  group_by( cd_statefip ) %>%
  summarize( state_reps = n() ) %>%
  rename( state_fips = cd_statefip ) %>%
  mutate( tot_natl_reps = sum( state_reps, na.rm = TRUE ) ) %>%
  left_join( select( filter( statewide_rep, CONGRESS == 74 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE" ) )

county_rep74 <- read_csv("data_to_import/county_to_cd/Crosswalk_1930_74.csv") %>%
  dplyr::select( gisjoin_1930, state_fips = cd_statefip, id, cd_area, cnty_area,
                 m1_weight, district ) %>%
  left_join( select( filter( housedist_rep, CONGRESS == 74 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE", "district" = "DIST" ) ) %>%
  mutate( NUM_DEM = replace_na( NUM_DEM, 0 ),
          NUM_LEFT = replace_na( NUM_LEFT, 0 ),
          cnty_area_in_cd = m1_weight * cnty_area,
          cnty_area_prop_cd = cnty_area_in_cd / cd_area,
          NUM_DEM = NUM_DEM * cnty_area_prop_cd,
          NUM_LEFT = NUM_LEFT * cnty_area_prop_cd ) %>%
  group_by( state_fips, gisjoin_1930 ) %>%
  summarize( tot_rep_cnty = sum( cnty_area_prop_cd, na.rm = TRUE ),
             prop_demrep_cnty = sum( NUM_DEM, na.rm = T ),
             prop_leftrep_cnty = sum( NUM_LEFT, na.rm = T ) ) %>%
  separate( gisjoin_1930, c( "other", "fips" ), -4 )%>%
  separate( fips, c( "fips", "other" ), -1 ) %>%
  mutate( fips = as.numeric( paste0( state_fips, fips ) ) ) %>%
  filter( other == 0 ) %>%
  dplyr::select( -other ) %>%
  left_join( dplyr::select( demog1930, fips, tot_pop1930 ), by = "fips" ) %>%
  left_join( state_reps74, by = "state_fips" ) %>%
  ungroup() %>%
  mutate( avg_county_rep = tot_rep_cnty / tot_pop1930 ) %>%
  group_by( state_fips ) %>%
  mutate( state_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_rep_state = state_reps / state_pop ) %>%
  ungroup() %>%
  mutate( tot_country_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_natl_rep = tot_natl_reps / tot_country_pop,
          rep_index_natl74 = avg_county_rep / avg_natl_rep,
          rep_index_state74 = avg_county_rep / avg_rep_state,
          NUM_DEM_HOUSE = replace_na( NUM_DEM, 0 ),
          NUM_LEFT_HOUSE = replace_na( NUM_LEFT, 0 ),
          prop_demrep_cnty74 = prop_demrep_cnty +
            NUM_DEM_HOUSE * ( tot_pop1930 / state_pop ),
          prop_leftrep_cnty74 = prop_leftrep_cnty +
            NUM_LEFT_HOUSE * ( tot_pop1930 / state_pop )) %>%
  dplyr::select( state_fips, fips, avg_county_rep74 = avg_county_rep, rep_index_natl74,
                 rep_index_state74, prop_demrep_cnty74, prop_leftrep_cnty74 )


# 75th Congress
state_reps75 <- read_csv("data_to_import/county_to_cd/Crosswalk_1930_75.csv") %>%
  distinct( cd_statefip, id ) %>%
  group_by( cd_statefip ) %>%
  summarize( state_reps = n() ) %>%
  rename( state_fips = cd_statefip ) %>%
  mutate( tot_natl_reps = sum( state_reps, na.rm = TRUE ) ) %>%
  left_join( select( filter( statewide_rep, CONGRESS == 75 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE" ) )

county_rep75 <- read_csv("data_to_import/county_to_cd/Crosswalk_1930_75.csv") %>%
  dplyr::select( gisjoin_1930, state_fips = cd_statefip, id, cd_area, cnty_area,
                 m1_weight, district ) %>%
  left_join( select( filter( housedist_rep, CONGRESS == 75 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE", "district" = "DIST" ) ) %>%
  mutate( NUM_DEM = replace_na( NUM_DEM, 0 ),
          NUM_LEFT = replace_na( NUM_LEFT, 0 ),
          cnty_area_in_cd = m1_weight * cnty_area,
          cnty_area_prop_cd = cnty_area_in_cd / cd_area,
          NUM_DEM = NUM_DEM * cnty_area_prop_cd,
          NUM_LEFT = NUM_LEFT * cnty_area_prop_cd ) %>%
  group_by( state_fips, gisjoin_1930 ) %>%
  summarize( tot_rep_cnty = sum( cnty_area_prop_cd, na.rm = TRUE ),
             prop_demrep_cnty = sum( NUM_DEM, na.rm = T ),
             prop_leftrep_cnty = sum( NUM_LEFT, na.rm = T ) ) %>%
  separate( gisjoin_1930, c( "other", "fips" ), -4 )%>%
  separate( fips, c( "fips", "other" ), -1 ) %>%
  mutate( fips = as.numeric( paste0( state_fips, fips ) ) ) %>%
  filter( other == 0 ) %>%
  dplyr::select( -other ) %>%
  left_join( dplyr::select( demog1930, fips, tot_pop1930 ), by = "fips" ) %>%
  left_join( state_reps75, by = "state_fips" ) %>%
  ungroup() %>%
  mutate( avg_county_rep = tot_rep_cnty / tot_pop1930 ) %>%
  group_by( state_fips ) %>%
  mutate( state_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_rep_state = state_reps / state_pop ) %>%
  ungroup() %>%
  mutate( tot_country_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_natl_rep = tot_natl_reps / tot_country_pop,
          rep_index_natl75 = avg_county_rep / avg_natl_rep,
          rep_index_state75 = avg_county_rep / avg_rep_state,
          NUM_DEM_HOUSE = replace_na( NUM_DEM, 0 ),
          NUM_LEFT_HOUSE = replace_na( NUM_LEFT, 0 ),
          prop_demrep_cnty75 = prop_demrep_cnty +
            NUM_DEM_HOUSE * ( tot_pop1930 / state_pop ),
          prop_leftrep_cnty75 = prop_leftrep_cnty +
            NUM_LEFT_HOUSE * ( tot_pop1930 / state_pop )) %>%
  dplyr::select( state_fips, fips, avg_county_rep75 = avg_county_rep, rep_index_natl75,
                 rep_index_state75, prop_demrep_cnty75, prop_leftrep_cnty75 )

# 76th Congress
state_reps76 <- read_csv("data_to_import/county_to_cd/Crosswalk_1930_76.csv") %>%
  distinct( cd_statefip, id ) %>%
  group_by( cd_statefip ) %>%
  summarize( state_reps = n() ) %>%
  rename( state_fips = cd_statefip ) %>%
  mutate( tot_natl_reps = sum( state_reps, na.rm = TRUE ) ) %>%
  left_join( select( filter( statewide_rep, CONGRESS == 76 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE" ) )

county_rep76 <- read_csv("data_to_import/county_to_cd/Crosswalk_1930_76.csv") %>%
  dplyr::select( gisjoin_1930, state_fips = cd_statefip, id, cd_area, cnty_area,
                 m1_weight, district ) %>%
  left_join( select( filter( housedist_rep, CONGRESS == 76 ), -CONGRESS, -STATE ),
             by = c( "state_fips" = "FIPSTATE", "district" = "DIST" ) ) %>%
  mutate( NUM_DEM = replace_na( NUM_DEM, 0 ),
          NUM_LEFT = replace_na( NUM_LEFT, 0 ),
          cnty_area_in_cd = m1_weight * cnty_area,
          cnty_area_prop_cd = cnty_area_in_cd / cd_area,
          NUM_DEM = NUM_DEM * cnty_area_prop_cd,
          NUM_LEFT = NUM_LEFT * cnty_area_prop_cd ) %>%
  group_by( state_fips, gisjoin_1930 ) %>%
  summarize( tot_rep_cnty = sum( cnty_area_prop_cd, na.rm = TRUE ),
             prop_demrep_cnty = sum( NUM_DEM, na.rm = T ),
             prop_leftrep_cnty = sum( NUM_LEFT, na.rm = T ) ) %>%
  separate( gisjoin_1930, c( "other", "fips" ), -4 )%>%
  separate( fips, c( "fips", "other" ), -1 ) %>%
  mutate( fips = as.numeric( paste0( state_fips, fips ) ) ) %>%
  filter( other == 0 ) %>%
  dplyr::select( -other ) %>%
  left_join( dplyr::select( demog1930, fips, tot_pop1930 ), by = "fips" ) %>%
  left_join( state_reps76, by = "state_fips" ) %>%
  ungroup() %>%
  mutate( avg_county_rep = tot_rep_cnty / tot_pop1930 ) %>%
  group_by( state_fips ) %>%
  mutate( state_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_rep_state = state_reps / state_pop ) %>%
  ungroup() %>%
  mutate( tot_country_pop = sum( tot_pop1930, na.rm = TRUE ),
          avg_natl_rep = tot_natl_reps / tot_country_pop,
          rep_index_natl76 = avg_county_rep / avg_natl_rep,
          rep_index_state76 = avg_county_rep / avg_rep_state,
          NUM_DEM_HOUSE = replace_na( NUM_DEM, 0 ),
          NUM_LEFT_HOUSE = replace_na( NUM_LEFT, 0 ),
          prop_demrep_cnty76 = prop_demrep_cnty +
            NUM_DEM_HOUSE * ( tot_pop1930 / state_pop ),
          prop_leftrep_cnty76 = prop_leftrep_cnty +
            NUM_LEFT_HOUSE * ( tot_pop1930 / state_pop )) %>%
  dplyr::select( state_fips, fips, avg_county_rep76 = avg_county_rep, rep_index_natl76,
                 rep_index_state76, prop_demrep_cnty76, prop_leftrep_cnty76 )
```

### Merged Data for Apportionment and Representation

```{r}

# Merging the above data to be year-by-year for all Congresses
cnty_apportion <- full_join( select( county_rep73, -state_fips ),
                             select( county_rep74, -state_fips ), by = "fips" ) %>%
  full_join( county_rep75, by = "fips" ) %>%
  left_join( ( sen_rep %>%
                 filter( CONGRESS %in% c( 73, 74, 75 ) ) %>%
                 group_by( FIPSTATE ) %>%
                 summarize( avg_demsen = mean( NUM_DEM ),
                            avg_leftsen = mean( NUM_LEFT + NUM_DEM ) ) %>%
                 ungroup() ), by = c( "state_fips" = "FIPSTATE" ) ) %>%
  left_join( ( gov_rep %>%
                 filter( year >= 1933, year <= 1938 ) %>%
                 group_by( FIPSTATE ) %>%
                 summarize( avg_demgov = mean( as.numeric( PARTY == "DEM" ) ),
                            avg_leftgov = mean( as.numeric( PARTY == "DEM" |
                                                              PARTY == "LEFT" ) ) ) %>%
                 ungroup() ), by = c( "state_fips" = "FIPSTATE" ) ) %>%
  mutate( avg_county_rep = ( avg_county_rep73 + avg_county_rep74 + avg_county_rep75 ) /
            3,
          rep_index_natl = ( rep_index_natl73 + rep_index_natl74 + rep_index_natl75 ) /
            3,
          rep_index_state = ( rep_index_state73 + rep_index_state74 +
                                rep_index_state75 ) / 3,
          avg_prop_demhouse = ( prop_demrep_cnty73 + prop_demrep_cnty74 +
                                  prop_demrep_cnty75 ) / 3,
          avg_prop_demlefthouse = ( prop_demrep_cnty73 + prop_leftrep_cnty73 +
                                      prop_demrep_cnty74 + prop_leftrep_cnty74 +
                                      prop_demrep_cnty75 + prop_leftrep_cnty75 ) / 3,
          avg_demrep = avg_prop_demhouse + avg_demgov + avg_demsen,
          avg_leftrep = avg_prop_demlefthouse + avg_leftgov + avg_leftsen ) %>%
  dplyr::select( fips, avg_county_rep, rep_index_natl, rep_index_state, avg_prop_demhouse,
                 avg_prop_demlefthouse, avg_demrep, avg_leftrep )

# Need governor data for political variables
state_data_gov <- icpsr_data00_90 %>%
  mutate( no_gov_year = ifelse( is.na( gov_tot_ ) & is.na( gov_dem_t ) &
                                 is.na( gov_rep_t ), 1, 0 ) ) %>%
  filter( no_gov_year != 1, gov_tot_ != 9999999, gov_dem_t != 9999999,
          year >=1920, year <= 1940 ) %>%
  select( fips, year, stpc, dem_votesg = gov_dem_t, dem_pctg = gov_dem_p,
          rep_votesg = gov_rep_t, rep_pctg = gov_rep_p, tot_votesg = gov_tot_ ) %>%
  mutate( dem_pctg = dem_pctg * 100, rep_pctg = rep_pctg * 100 )

# Computing political variables
apport_vars <- full_join( state_data_sen20_40, pres_vote24_40,
                           by = c( "fips", "year" ) ) %>%
  full_join( state_data_gov, by = c( "fips", "year" ) ) %>%
  filter( year >= 1922 & year <= 1928 ) %>%
  dplyr::select( fips, year, dem_pctp, dem_pcts, dem_pctg ) %>%
  gather( "variable", "value", -fips, -year ) %>%
  group_by( fips ) %>%
  summarize( dem_county_avg = mean( value, na.rm = TRUE ),
             sd_dem_avg = sd( value, na.rm = TRUE),
             dem_closeness = abs( dem_county_avg - 50 ) ) %>%
  left_join( cnty_apportion, by = "fips" )

# Counting representatives (Governors, Senate, House) for each county-by-year
cnty_demrep <- rbind( select( mutate( county_rep72, CONGRESS = 72 ), state_fips, fips,
                              CONGRESS, prop_demhouse = prop_demrep_cnty72 ),
                      select( mutate( county_rep73, CONGRESS = 73 ), state_fips, fips,
                              CONGRESS, prop_demhouse = prop_demrep_cnty73 ),
                      select( mutate( county_rep74, CONGRESS = 74 ), state_fips, fips,
                              CONGRESS, prop_demhouse = prop_demrep_cnty74 ),
                      select( mutate( county_rep75, CONGRESS = 75 ), state_fips, fips,
                              CONGRESS, prop_demhouse = prop_demrep_cnty75 ),
                      select( mutate( county_rep76, CONGRESS = 76 ), state_fips, fips,
                              CONGRESS, prop_demhouse = prop_demrep_cnty76 ) ) %>%
  select( fips, prop_demhouse, CONGRESS, state_fips ) %>%
  left_join( sen_rep, by = c( "state_fips" = "FIPSTATE", "CONGRESS" = "CONGRESS" ) ) %>%
  mutate( CONG_YR = 1,
          num_leftsen = NUM_LEFT + NUM_DEM ) %>%
  rename( num_demsen = NUM_DEM ) %>%
  select( -NUM_LEFT )

cnty_demrep <- rbind( cnty_demrep, mutate( cnty_demrep, CONG_YR = 2 ) ) %>%
  mutate( YEAR = CONGRESS * 2 + 1787 + CONG_YR ) %>%
  select( -CONG_YR ) %>%
  filter( YEAR >= 1932, YEAR <= 1940 ) %>%
  left_join( ( gov_rep %>%
                 select( FIPSTATE, year, PARTY ) %>%
                 group_by( FIPSTATE, year ) %>%
                 summarize( num_demgov = ifelse( sum( PARTY == "DEM" ) > 0, 1, 0 ),
                            num_leftgov = ifelse( sum( PARTY == "DEM" |
                                                         PARTY == "LEFT" ) > 0,
                                                  1, 0 ) ) %>%
                 ungroup() ),
             by = c( "state_fips" = "FIPSTATE", "YEAR" = "year" ) ) %>%
  mutate( num_demrep = prop_demhouse + num_demsen + num_demgov )

```


## Merge Data - Presidents

```{r}

# Region lists from census
northeast1 <- c( "CT", "ME", "MA", "NH", "RI", "VT" )
northeast2 <- c( "NY", "NJ", "PA" )
midwest1 <- c( "IN", "IL", "MI", "OH", "WI" )
midwest2 <- c( "IA", "KS", "MN", "MO", "NE", "ND", "SD" )
south1 <- c("DE", "DC", "FL", "GA", "MD", "NC", "SC", "VA", "WV")
south2 <- c( "AL", "KY", "MS", "TN" )
south3 <- c( "AR", "LA", "OK", "TX" )
mountain <- c( "AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY" )
pacific <- c( "AK", "CA", "HI", "OR", "WA" )

# Combine all data for presidential models
state_data_pres <- demog_1932_1940 %>%
  filter( year %in% c( 1932, 1936, 1940 ) ) %>%
  left_join( pres_vote32_40, by = c( "fips", "year" ) ) %>%
  left_join( state_spending_24_40, by = c( "stpc", "year" ) ) %>%
  left_join( machine_cities, by = "fips" ) %>%
  left_join( all_cities_1931, by ="fips" ) %>%
  left_join( cnty_demrep, by = c( "fips" = "fips", "year" = "YEAR" ) ) %>%
  mutate( machine_nd = ifelse( year <= 1935, firm1935,
                         ifelse( year >= 1936 & year < 1938, firm1936,
                           ifelse( year == 1938, firm1938,
                             ifelse( year >= 1939, firm1939, 0 ) ) ) ),
          machine_nd_all = ifelse( firm1935 == 1 & firm1936 == 1 & firm1938 == 1 &firm1939 ==1, 1, 0 ),
          machine_nd_any = any_machine,
          ally = ally1935,
          prev_mach = prior1935 ) %>%
  replace_na( list( machine_nd = 0, city_1931 = 0, ally = 0, prev_mach = 0, machine_nd_any = 0 ) ) %>%
  mutate( category = as.factor( ifelse( machine_nd == 1, "machine",
                                  ifelse( city_1931 == 1 & machine_nd == 0, "city",
                                    ifelse( city_1931 == 0 & machine_nd == 0,
                                            "other", "other" ) ) ) ),
         rural = ifelse( city_1931 == 1, 0, 1 ),
         region = ifelse( stpc %in% northeast1, "northeast1",
                    ifelse( stpc %in% northeast2, "northeast2",
                      ifelse( stpc %in% midwest1, "midwest1",
                        ifelse( stpc %in% midwest2, "midwest2",
                          ifelse( stpc %in% south1, "south1",
                            ifelse( stpc %in% south2, "south2",
                              ifelse( stpc %in% south3, "south3",
                                ifelse( stpc %in% mountain, "mountain",
                                  ifelse( stpc %in% pacific, "pacific",
                                          "other" ) ) ) ) ) ) ) ) ),
         other_spend_pc = grants_pc - works_grants_pc ) %>%
  distinct( fips, year, .keep_all = TRUE ) # Remove duplicates added through merges

# Note: ICPSR is missing data for 2 Baltimore and 3 St. Louis city elections
# Also missing Providence count and DC (bc DC was not a voting entity)

```


## Merge Data - Senate

```{r}

# Combine all data for senate models
state_data_sen <- icpsr_data00_90 %>%
  mutate( no_sen_year = ifelse( is.na( sen_tot_ ) & is.na( sen_dem_t ) &
                                  is.na( sen_rep_t ), 1, 0 ) ) %>%
  filter( no_sen_year != 1, sen_tot_ != 9999999, sen_dem_t != 9999999,
          year >= 1932 & year <= 1940, !is.na( fips ) ) %>%
  select( fips, year, stpc, dem_votess = sen_dem_t, dem_pcts = sen_dem_p,
          rep_votess = sen_rep_t, rep_pcts = sen_rep_p,
          tot_votess = sen_tot_ ) %>%
  mutate( dem_pcts = dem_pcts * 100, rep_pcts = rep_pcts * 100 ) %>%
  left_join( demog_1932_1940, by = c( "fips", "year" ) ) %>%
  dplyr::select( fips, stpc, year:white_pct, dem_votess:tot_votess ) %>%
  left_join( state_spending_24_40, by = c( "stpc", "year" ) ) %>%
  left_join( machine_cities, by = "fips" ) %>%
  left_join( all_cities_1931, by ="fips" ) %>%
  left_join( cnty_demrep, by = c( "fips" = "fips", "year" = "YEAR" ) ) %>%
  mutate( machine_nd = ifelse( year <= 1935, firm1935,
                         ifelse( year >= 1936 & year < 1938, firm1936,
                           ifelse( year == 1938, firm1938,
                             ifelse( year >= 1939, firm1939, 0 ) ) ) ),
          machine_nd_all = ifelse(firm1935 == 1 & firm1936 == 1 & firm1938 == 1 &firm1939 ==1, 1, 0),
          machine_nd_any = any_machine,
          ally = ally1935,
          prev_mach = prior1935 ) %>%
  replace_na( list( machine_nd = 0, city_1931 = 0, ally = 0, prev_mach = 0, machine_nd_any = 0 ) ) %>%
  mutate( category = as.factor( ifelse( machine_nd == 1, "machine",
                                  ifelse( city_1931 == 1 & machine_nd == 0, "city",
                                    ifelse( city_1931 == 0 & machine_nd == 0, "other",
                                            "other" ) ) ) ),
          rural = ifelse( city_1931 == 1, 0, 1 ),
          region = ifelse( stpc %in% northeast1, "northeast1",
                     ifelse( stpc %in% northeast2, "northeast2",
                       ifelse( stpc %in% midwest1, "midwest1",
                         ifelse( stpc %in% midwest2, "midwest2",
                           ifelse( stpc %in% south1, "south1",
                             ifelse( stpc %in% south2, "south2",
                               ifelse( stpc %in% south3, "south3",
                                 ifelse( stpc %in% mountain, "mountain",
                                   ifelse( stpc %in% pacific, "pacific",
                                           "other" ) ) ) ) ) ) ) ) ),
          pres_year = ifelse( year %in% c( 1932, 1936, 1940 ), 1, 0 ),
          other_spend_pc = grants_pc - works_grants_pc )  %>%
  filter( top_100_gibson == 1 ) %>%
  distinct( fips, year, dem_pcts, .keep_all = TRUE )

# Note:
# Missing: GA 32; IA 36; MN 36; NB 34; MD except 38; CA 36, 38, 40; OR 38; CO 32
# CA has data for 1938, but no data on dem vote share

```

# County-Level Data     

```{r}

# Correcting an odd case for merging
regions <- state_data_pres %>%
  distinct( fips, stpc, region ) %>%
  mutate( stpc = ifelse( fips == 44007, "RI", stpc ) )

# Data on county-level spending see Fishback and Kantor for abbreviations
county_spending1939 <- read_excel( "data_to_import/county_spend.xls" ) %>%
  dplyr::select( sticpsr = STATE, name = NAME, FERA, CWA, WPA, SSA = PUBASS, PWAF, PWANF2,
                 PRA, PBA, USHAH, FSARR2, AAA, NDEXP, RELIEF, PUBWOR, tot_pop = POP30,
                 PCTURB3, TAXRTP29, RTSAPC29, RLDF3329, PCTUNEM, MEAN9628, ROOSMMN2,
                 STD9632, PCTVT32 ) %>%
  left_join( dplyr::select( icpsr_merge, sticpsr, name, fips ),
             by = c( "sticpsr", "name" ) ) %>%
  mutate( fips = ifelse( sticpsr == 5 & name == "PROVIDENCE", 44007, fips ) ) %>%
  left_join( filter( demog_1932_1940, year == 1940 ), by = "fips" ) %>%
  left_join( filter( demog1930, !is.na( fips ) ), by = "fips" ) %>%
  left_join( demog1940, by = "fips" ) %>%
  distinct( fips, .keep_all = TRUE ) %>%
  mutate( total_spending = FERA + CWA + WPA + SSA + PWAF + PRA + PBA + AAA + FSARR2 +
            USHAH,
          total_spend_city = FERA + CWA + WPA + SSA, # to match city spending
          works_spend = FERA + CWA + WPA, 
          spend_per_cap = total_spending / tot_pop1930,
          wpa_per_cap = WPA / tot_pop1930,
          fera_per_cap = FERA / tot_pop1930,
          works_per_cap = works_spend / tot_pop1930,
          spend_per_cap_b = total_spend_city / tot_pop1930,
          unemp_change = unemp_rate1940 - unemp_rate1930 ) %>%
  left_join( apport_vars, by = "fips" ) %>%
  left_join( machine_cities, by = "fips" ) %>%
  left_join( all_cities_1931, by ="fips" ) %>%
  mutate( machine_nd = ifelse( firm1935 == 1, 1,
                              ifelse( firm1936 == 1, 1,
                              ifelse( firm1938 == 1, 1,
                              ifelse( firm1939 == 1, 1, 0 ) ) ) ) ) %>%
  left_join( regions, by = "fips" ) %>%
  replace_na( list( machine_nd = 0, city_1931 = 0 ) ) %>%
  mutate( category = as.factor( ifelse( machine_nd == 1, "machine",
                                  ifelse( city_1931 == 1 & machine_nd == 0, "city",
                                    ifelse( city_1931 == 0 & machine_nd == 0, "other",
                                           "other" ) ) ) ),
         rural = ifelse( city_1931 == 1, 0, 1 ),
         # census regions
         region = ifelse( stpc %in% northeast1, "northeast1",
                    ifelse( stpc %in% northeast2, "northeast2",
                      ifelse( stpc %in% midwest1, "midwest1",
                        ifelse( stpc %in% midwest2, "midwest2",
                          ifelse( stpc %in% south1, "south1",
                            ifelse( stpc %in% south2, "south2",
                              ifelse( stpc %in% south3, "south3",
                                ifelse( stpc %in% mountain, "mountain",
                                  ifelse( stpc %in% pacific, "pacific",
                                          "other" ) ) ) ) ) ) ) ) ) )

# Note: Data are missing St. Louis, MO b/c it is a separate city/county unit

```


# City-Level Data for Synthetic Control

```{r}

# Crosswalk file 
state_abbrev <- read_csv( "data_to_import/state_abbrev.csv" ) %>%
  select( state_name = State, stpc = Code )

# Compile city spending data
city_spending_1932_1935 <- read_excel( "data_to_import/cities_public_private_machine.xls" ) %>%
  dplyr::select( stpc = STATEABL, sticpsr = STATE, ctyicpsr = COUNTY,
                 citycode = CITYCODE, cnty_name = COUNNAME, city = CTNAMEP, machine,
                 year = YEAR, CTCOUNT:PRIVATE, POPEST ) %>%
  mutate( city = as.factor( city ),
         DIRREL = ifelse( year >= 1938 & is.na( DIRREL ), 0, DIRREL ),
         WORREL = ifelse( year >= 1938 & is.na( WORREL ), 0, WORREL ),
         pub_ast = CWA +  WPA + DIRREL + WORREL + OLDAGE + ADEPCH + BLIND ) %>%
  replace_na( list( machine = 0 ) ) %>%
  dplyr::select( stpc, cnty_name, city, machine, year, POPEST, pub_ast, DIRREL, WORREL,
                 CWA, WPA )  %>%
  filter(year >= 1933 & year <= 1935) %>%
  left_join( state_abbrev, by = "stpc" ) %>%
  mutate_each( funs( toupper ), state_name ) %>%
  left_join( all_cities_1931, by = c( "state_name", "city" ) ) %>%
  group_by( fips, year ) %>% # Filter all counties w/multiple cities
  mutate(n = n()) %>%
  filter( n == 1, top_100_gibson == 1 ) %>%
  ungroup() %>%
  group_by( stpc, cnty_name, city, fips ) %>%
  summarise( WORREL = mean( WORREL, na.rm = TRUE ),
             CWA = mean( CWA, na.rm = TRUE ),
             WPA = mean( WPA, na.rm = TRUE ),
             POPEST = mean( POPEST, na.rm =  TRUE ),
             avg_works_grants_pc33_35 = ( WORREL + CWA + WPA ) / POPEST ) %>%
  unite( "name", c( city, cnty_name ), sep = "_" )

# Note: Data is combined for Allentown/Bethlehem
# We've adjusted so that all the data go to Allentown
# Allentown is fully in Lehigh County, while Bethlehem is only partly in Lehigh
# This keeps the county-level data more honest
```

## Merging

```{r}

# Compile synthetic control data
synth_data <- pres_vote24_40  %>%
  dplyr::select( fips, year, dem_pctp ) %>%
  left_join( city_spending_1932_1935, by = "fips" ) %>%
  filter( !is.na( name ) ) %>%
  left_join( demog1930, by = "fips" ) %>%
  filter( !is.na( fips ), !is.na( name.x ),
         year %in% c( 1924, 1928, 1932, 1936, 1940 ) )  %>%
  left_join( dplyr::select( machine_cities, fips, any_machine:firm1939 ),
             by = "fips" ) %>%
  left_join( filter( cnty_demrep, YEAR == 1932 ), by = "fips" ) %>%
  mutate( machine_nd = ifelse( year <= 1935, firm1935,
                         ifelse( year >= 1936 & year < 1938, firm1936, 
                           ifelse( year == 1938, firm1938, 
                             ifelse( year >= 1939, firm1939, 0 ) ) ) ) ) %>%
  replace_na( list( any_machine = 0 ) ) %>%
  select( name = name.x, year, stpc, fips, dem_pctp, avg_works_grants_pc33_35,
          any_machine, num_demrep, tot_pop1930:forgn_white_pct1930 ) 

```



